Smart-seq2 Quality Control
[1] "Seurat is loaded correctly"
[1] "viridis is loaded correctly"
[1] "ggpubr is loaded correctly"
[1] "cowplot is loaded correctly"
[1] "ggplot2bdc is loaded correctly"
[1] "patchwork is loaded correctly"
This will be helpful later on. This contains annotations for each gene:
##Import gtf file:
gtf <- read.table("/Users/Andy/GCSKO/GCSKO_analysis_git/data/Reference/Pberghei.gtf", sep="\t", header = FALSE)
## inspect
head(gtf)
*N.B.This was the .gtf file used for feature counting the data
## read in counts
counts <- read.delim("/Users/Andy/GCSKO/GCSKO_analysis_git/data/Smartseq2/counts_2020.csv", sep = ",", stringsAsFactors = FALSE, row.names = 1)
## read in pheno
pheno <- read.delim("/Users/Andy/GCSKO/GCSKO_analysis_git/data/Smartseq2/pheno_2020.csv", sep = ",", stringsAsFactors = FALSE)
## check dimensions of both
expected_number_of_cells <- ( 384 * 12)
paste("The expected number of cells is", expected_number_of_cells)
[1] "The expected number of cells is 4608"
paste("number of genes in counts", dim(counts)[1], "number of cells in counts", dim(counts)[2])
[1] "number of genes in counts 5250 number of cells in counts 4608"
paste("number of columns in counts", dim(pheno)[2], "number of cells in pheno", dim(pheno)[1])
[1] "number of columns in counts 98 number of cells in pheno 4608"
## make rownames the sample_id in pheno
rownames(pheno) <- pheno$sample_id
## check that the names are the same
paste("Are the names the same?")
[1] "Are the names the same?"
table((colnames(counts) %in% rownames(pheno)))
TRUE
4608
These cells were included in one of the sequencing runs and are irrelevant.
## get rid of the cells by finding all the cells that have As in their species col
names_of_mosquito_cells <- rownames(pheno[(pheno$species == "As"), ])
## which gives
table(colnames(counts) %in% names_of_mosquito_cells)
FALSE TRUE
4512 96
## subset
counts <- counts[,-which(colnames(counts) %in% names_of_mosquito_cells)]
pheno <- pheno[-(which(rownames(pheno) %in% names_of_mosquito_cells)), ]
## check dimensions
paste("number of genes in counts", dim(counts)[1], "number of cells in counts", dim(counts)[2])
[1] "number of genes in counts 5250 number of cells in counts 4512"
paste("number of columns in counts", dim(pheno)[2], "number of cells in pheno", dim(pheno)[1])
[1] "number of columns in counts 98 number of cells in pheno 4512"
## find all cells that are labelled as such
names_of_control_cells <- rownames(pheno[(pheno$is_control == "TRUE"), ])
## which gives
table(colnames(counts) %in% names_of_control_cells)
FALSE TRUE
4386 126
## subset
counts <- counts[,-which(colnames(counts) %in% names_of_control_cells)]
pheno <- pheno[-(which(rownames(pheno) %in% names_of_control_cells)), ]
## check dimensions
paste("number of genes in counts", dim(counts)[1], "number of cells in counts", dim(counts)[2])
[1] "number of genes in counts 5250 number of cells in counts 4386"
paste("number of columns in counts", dim(pheno)[2], "number of cells in pheno", dim(pheno)[1])
[1] "number of columns in counts 98 number of cells in pheno 4386"
10 and 27 failed genotyping and so will be removed
## find all cells that are labelled as such
names_of_genotype_fail_cells <- rownames(pheno[(pheno$sub_identity_updated %in% c("GCSKO-10", "WT-10", "GCSKO-27")), ])
## which gives
table(colnames(counts) %in% names_of_genotype_fail_cells)
FALSE TRUE
3732 654
## subset
counts <- counts[,-which(colnames(counts) %in% names_of_genotype_fail_cells)]
pheno <- pheno[-(which(rownames(pheno) %in% names_of_genotype_fail_cells)), ]
## check dimensions
paste("number of genes in counts", dim(counts)[1], "number of cells in counts", dim(counts)[2])
[1] "number of genes in counts 5250 number of cells in counts 3732"
paste("number of columns in counts", dim(pheno)[2], "number of cells in pheno", dim(pheno)[1])
[1] "number of columns in counts 98 number of cells in pheno 3732"
Filter out the QC columns contained in counts that do not correspond to genes
## since the datatable currently contains:
paste("The current counts table contains these non-gene rows:")
[1] "The current counts table contains these non-gene rows:"
rownames(counts[-(grep("PBANKA*", rownames(counts))), ])
[1] "__alignment_not_unique" "__ambiguous" "__no_feature"
[4] "__not_aligned" "__too_low_aQual"
## subset
# make a copy of counts just in case that info is useful in future analysis
counts_genes <- counts
# remove non-gene rows
counts <- counts[(grep("PBANKA*", rownames(counts))), ]
#check
paste("number of genes in counts", dim(counts)[1], "number of cells in counts", dim(counts)[2])
[1] "number of genes in counts 5245 number of cells in counts 3732"
This 5245 includes all types of genes:
## check size
paste("The dimensions of the GTF file are:")
[1] "The dimensions of the GTF file are:"
dim(gtf[-which(gtf$V3 == "CDS"),])
[1] 5245 9
## look at possible types of genes
paste("The types of genes in the dataframe are:")
[1] "The types of genes in the dataframe are:"
names(table(gtf$V3))
[1] "CDS" "mRNA" "ncRNA"
[4] "pseudogenic_transcript" "rRNA" "snoRNA"
[7] "snRNA" "tRNA"
make seurat object
## make seurat object
ss2_mutants <- CreateSeuratObject(counts = counts, project = "GCSKO", min.cells = 0, min.features = 1, meta.data = pheno)
Feature names cannot have underscores ('_'), replacing with dashes ('-')
## n.b. cells must have at least 1 gene per cell because otherwise this causes problems downstream
## add experiment column to meta data:
ss2_mutants@meta.data$experiment <- "ss2_mutants"
## inspect object
ss2_mutants
An object of class Seurat
5245 features across 3705 samples within 1 assay
Active assay: RNA (5245 features, 0 variable features)
## how many genes are not detected at all?
paste(length(which(rowSums(as.matrix(ss2_mutants@assays$RNA@counts)) == 0)), "genes are not detected in any cell")
[1] "63 genes are not detected in any cell"
[1] "27 cells have zero counts"
set up data
## make dataframe
df_platemap <- data.frame(well_name = pheno$well_name_R, cell_name = rownames(pheno), row.names = rownames(pheno))
## get cells that are filtered out
cells_kept <- which(rownames(df_platemap) %in% rownames(ss2_mutants@meta.data))
## make extra column in plotting df
df_platemap$colour <- "filtered_out"
df_platemap$colour[cells_kept] <- "passed_QC"
## inspect
#head(df_platemap)
##Make a table for each
##make a new dataframe for plotting
table_platemap <- as.data.frame(table(df_platemap$well_name, df_platemap$colour))
## modify names of dataframe
names(table_platemap) <- c("well", "filtered", "frequency")
#make separate dfs
df_1 <- table_platemap[table_platemap$filtered == "filtered_out",]
df_2 <- table_platemap[table_platemap$filtered == "passed_QC",]
# merge them back together
table_platemap <- merge(df_1, df_2, by = "well", all = TRUE)
## remove the irrelevant rows and rename cols and rename rows
table_platemap <- table_platemap[ ,c(1,3,5)]
names(table_platemap) <- c("well", "filtered_out", "passed_QC")
table_platemap$well_name <- rownames(table_platemap)
## add A1?
#table_platemap[96,] <- c("A1", "0", "0")
## add cols for plotting
table_platemap$Row <- as.numeric(match(toupper(substr(table_platemap$well, 1, 1)), LETTERS))
table_platemap$Column <- as.numeric(substr(table_platemap$well, 2, 5))
## calculate a percentage
table_platemap$filtered_pc <- ((table_platemap$filtered_out)/(table_platemap$filtered_out + table_platemap$passed_QC))*100
## due to an error in the scale_y_reverse, you need to make a column of 'reversed' row values to plot
for(i in seq(1,8)){
table_platemap$Row_rev[table_platemap$Row == i] <- seq(8,1)[i]
}
## find wells where it's zero
zero_wells <- table_platemap$filtered_pc == 0
table_platemap$filtered_pc[zero_wells] <- NA
Plot
## plot
sample_map_no_reads <- ggplot(data=table_platemap, aes(x=Column, y=Row_rev)) +
#set up the platemap layout
geom_point(data=expand.grid(seq(1,12), seq(1,8)), aes(x=Var1, y=Var2), color="grey90", fill="white", shape=21, size=8) +
#Change the shape and colour of points for a variable
geom_point(aes(colour = filtered_pc), size = 7) +
#change the colours
scale_colour_viridis_c(guide = "colourbar", na.value="white") +
#fix the ratio of coordinates
coord_fixed(ratio=(13/12)/(9/8), xlim=c(0.5, 12.5), ylim=c(0.6, 8.4)) +
# make into a plate plot
theme_bdc_microtiter() +
#add labels for the y axis
scale_y_continuous(breaks = seq(1, 8), labels = LETTERS[8:1]) +
#add labels for the x axis
scale_x_continuous(position = "top", breaks=seq(1, 12)) +
#Add a title and change label of fill
labs(title="The position of cells with zero counts" , size = 6, colour = "percentage of cells in this well with zero counts") +
#Improve aes of guides
guides(colour = guide_colorbar(barwidth = 4, barheight = 1, title="percentage of cells in this well with zero counts")) +
# display legend on bottom and make text bigger
theme(legend.position="bottom", legend.text=element_text(size=10), legend.title = element_text(size = 10))
## print
sample_map_no_reads
## save
ggsave("../images_to_export/QC_SS2_platemap_no_count.png", plot = sample_map_no_reads, device = "png", path = NULL, scale = 1, width = 10, height = 10, units = "cm", dpi = 300, limitsize = TRUE)
Mitochondrial cell counts
## old method
## This old method just picked out rRNA genes as a rough estimate, the method below picks out all genes expressed from the mitchondrial genome
## extract mitochondrial genes
#mito_genes <- gtf[which(gtf$V3 == "rRNA"),]$V9
#mito_genes <- gsub(";.*","", gsub("gene_id ", "", mito_genes))
#paste("These are the mitochondrial genes")
#head(mito_genes)
## make a percentage mitocondrial for each cell (this will work as long as you filter cells out with zero counts)
#ss2_mutants <- PercentageFeatureSet(ss2_mutants, features = which(rownames(counts) %in% mito_genes), col.name = "percent.mt")
## extract mito genes
mito_genes <- ss2_mutants@assays$RNA@counts@Dimnames[[1]][grep("^PBANKA-MIT",ss2_mutants@assays$RNA@counts@Dimnames[[1]])]
## plot the genes individually
VlnPlot(object = ss2_mutants, features = mito_genes, pt.size = 0.01, group.by = "experiment")
## make a percentage mitocondrial for each cell (this will work as long as you filter cells out with zero counts)
ss2_mutants <- PercentageFeatureSet(ss2_mutants, pattern = "^PBANKA-MIT", col.name = "percent.mt")
plot percentage mitochondrial
## plot for percentage of mitochondrial reads
v1 <- VlnPlot(object = ss2_mutants, features = "percent.mt", group.by = "sub_identity_updated", pt.size = 0.01) +
## add a line where we will filter
geom_hline(yintercept=20) +
## remove legend
theme(legend.position = "none") +
## change labels
labs(x = "Genotype", y = "% Mitochondrial Reads") +
## remove plot title
theme(plot.title = element_blank())
## plot for percentage of mitochondrial reads
v2 <- VlnPlot(object = ss2_mutants, features = "percent.mt", group.by = "experiment", pt.size = 0.01) +
geom_hline(yintercept=20) +
## remove legend
theme(legend.position = "none") +
## fremove axis labels
labs(x="", y = "") +
## remove axis elements
theme(plot.title = element_blank(), axis.text.y = element_blank(),
axis.ticks.y = element_blank()) +
## change label on bottom of plot
scale_x_discrete(labels = "All cells")
## plot together
QC_mito_violin <- v1 + v2 + plot_layout(ncol = 2, nrow = 1, widths = c(4, 1), heights = c(2, 2))
## print
QC_mito_violin
## save
ggsave("../images_to_export/QC_SS2_mito_violin.png", plot = QC_mito_violin, device = "png", path = NULL, scale = 1, width = 15, height = 10, units = "cm", dpi = 300, limitsize = TRUE)
make a dataframe for plotting
df <- data.frame(nCount = log10(ss2_mutants@meta.data$nCount_RNA), nGenes = ss2_mutants@meta.data$nFeature_RNA, identity = ss2_mutants@meta.data$identity_updated, identity_gene = ss2_mutants@meta.data$identity_gene_updated, percent_mt = ss2_mutants@meta.data$percent.mt)
Where do mitchondrial poor cells lie?
## make extra col for filtered values:
filtered_out_cells <- which(df$percent_mt > 20)
## plot
QC_mito_graph <- ggplot(df, aes(x = nCount, y = nGenes)) +
geom_point(alpha = 0.4) +
scale_size(range = c(0.1,4)) +
#geom_rug() +
scale_y_continuous(name = "Number of Detected Genes") +
scale_x_continuous(name = "log10(Number of Total Counts)") +
scale_color_viridis(option = "D") +
theme_pubr() +
theme(legend.position = "bottom") +
geom_vline(xintercept=3) +
geom_hline(yintercept=220) +
geom_hline(yintercept=3300) +
#annotate selected points
annotate("point", df$nCount[filtered_out_cells], df$nGenes[filtered_out_cells], size = 2, colour = "orange")
## add this to gemo_point if needed: aes(colour = percent_mt, size = percent_mt)
## print
QC_mito_graph
## save
ggsave("../images_to_export/QC_SS2_mito_graph.png", plot = QC_mito_graph, device = "png", path = NULL, scale = 1, width = 10, height = 10, units = "cm", dpi = 300, limitsize = TRUE)
## plot 1
plot1 <- FeatureScatter(ss2_mutants, feature1 = "nCount_RNA", feature2 = "percent.mt", pt.size = 0.01, group.by = "identity_updated")
## plot 2
plot2 <- FeatureScatter(ss2_mutants, feature1 = "nCount_RNA", feature2 = "nFeature_RNA", pt.size = 0.01, group.by = "identity_updated")
## plot together
plot1 + plot2
in the MCA paper: “Cells with fewer than 1000 genes per cell and 2500 reads per cell were removed from the liver-stage parasites, trophozoites, male and female gametocytes, ookinetes, ookinetes/oocysts, and oocyst stages. Cells with fewer than 500 genes per cell and 2500 reads per cell were removed from schizonts and injected sporozoites. Cells with fewer than 40 genes per cell and 1000 reads per cell were removed from merozoites, rings, and gland sporozoites (fig. S2 and table S1). Additionally, we removed genes from further analysis that were detected in fewer than two cells across the entire dataset. The final dataset contained 1787 high-quality single cells from 1982 sequenced cells and 5156 genes out of 5245 genes with annotated transcripts.”
so use: 1000 reads per cell & 40 genes per cell as absolute minimums
## plot main dotplot
plot1 <- ggplot(df, aes(x = nCount, y = nGenes, color = identity)) +
geom_point(aes(color = identity), size = 0.1) +
geom_rug() +
scale_y_continuous(name = "Number of Detected Genes") +
scale_x_continuous(name = "log10(Number of Total Counts)") +
theme_pubr() +
theme(legend.position = "bottom") +
geom_vline(xintercept=3) +
geom_hline(yintercept=220) +
geom_hline(yintercept=3300) +
## improve aesthetics of guide
guides(colour = guide_legend(override.aes = list(size=4)), title="Genotype")
## plot density plot x
dens1 <- ggplot(df, aes(x = nCount, fill = identity)) +
geom_density(alpha = 0.2) +
theme_void() +
theme(legend.position = "none")
## plot density plot y
dens2 <- ggplot(df, aes(x = nGenes, fill = identity)) +
geom_density(alpha = 0.2) +
theme_void() +
theme(legend.position = "none") +
coord_flip()
## plot together
QC_composite_plot <- dens1 + plot_spacer() + plot1 + dens2 + plot_layout(ncol = 2, nrow = 2, widths = c(4, 1), heights = c(1, 4))
## print
QC_composite_plot
## save
ggsave("../images_to_export/QC_SS2_composite_plot.png", plot = QC_composite_plot, device = "png", path = NULL, scale = 1, width = 20, height = 20, units = "cm", dpi = 300, limitsize = TRUE)
filter out cells
## subset
ss2_mutants_final <- subset(ss2_mutants, subset = nFeature_RNA > 220 & nFeature_RNA < 3300 & percent.mt < 20 & nCount_RNA > 1000)
## inspect resultant object
ss2_mutants_final
set up data
## make dataframe
df_platemap <- data.frame(well_name = pheno$well_name_R, cell_name = rownames(pheno), row.names = rownames(pheno))
## get cells that are filtered out
cells_kept <- which(rownames(df_platemap) %in% rownames(ss2_mutants_final@meta.data))
## make extra column in plotting df
df_platemap$colour <- "filtered_out"
df_platemap$colour[cells_kept] <- "passed_QC"
## inspect
head(df_platemap)
## make a new dataframe with metrics of interest
table_platemap <- as.data.frame(table(df_platemap$well_name, df_platemap$colour))
names(table_platemap) <- c("well", "filtered", "frequency")
# make separate dfs
df_1 <- table_platemap[table_platemap$filtered == "filtered_out",]
df_2 <- table_platemap[table_platemap$filtered == "passed_QC",]
# merge them back together
table_platemap <- merge(df_1, df_2, by = "well", all = TRUE)
## remove the irrelevant rows and rename cols and rename rows
table_platemap <- table_platemap[ ,c(1,3,5)]
names(table_platemap) <- c("well", "filtered_out", "passed_QC")
table_platemap$well_name <- rownames(table_platemap)
## add cols for plotting
table_platemap$Row <- as.numeric(match(toupper(substr(table_platemap$well, 1, 1)), LETTERS))
table_platemap$Column <- as.numeric(substr(table_platemap$well, 2, 5))
## inspect:
head(table_platemap)
## calculate percentage
table_platemap$filtered_pc <- ((table_platemap$filtered_out)/(table_platemap$filtered_out + table_platemap$passed_QC))*100
## due to an error in the scale_y_reverse, you need to make a column of 'reversed' row values to plot
for(i in seq(1,8)){
table_platemap$Row_rev[table_platemap$Row == i] <- seq(8,1)[i]
}
zero_wells <- table_platemap$filtered_pc == 0
table_platemap$filtered_pc[zero_wells] <- NA
plot
## plot
platemap_failed_qc <- ggplot(data=table_platemap, aes(x=Column, y=Row_rev)) +
#set up the platemap layout
geom_point(data=expand.grid(seq(1,12), seq(1,8)), aes(x=Var1, y=Var2), color="grey90", fill="white", shape=21, size=8) +
#Change the shape and colour of points for a variable
geom_point(aes(colour = filtered_pc), size = 7) +
#change the colours
scale_colour_viridis_c(guide = "colourbar", na.value="white") +
#Add a title and change label of fill
labs(title="The position of cells that failed QC", size = 6) +
#fix the ratio of coordinates
coord_fixed(ratio=(13/12)/(9/8), xlim=c(0.5, 12.5), ylim=c(0.6, 8.4)) +
# make into a plate plot
theme_bdc_microtiter() +
#add labels for the y axis
scale_y_continuous(breaks = seq(1, 8), labels = LETTERS[8:1]) +
#add labels for the x axis
scale_x_continuous(position = "top", breaks=seq(1, 12)) +
#Improve aes of guides
guides(colour = guide_colorbar(barwidth = 4, barheight = 1, title="percentage of cells in well that failed QC")) +
# display legend on bottom and make text bigger
theme(legend.position="bottom", legend.text=element_text(size=10), legend.title = element_text(size = 9))
## print
platemap_failed_qc
## save
ggsave("../images_to_export/QC_SS2_platemap_failed_qc.png", plot = platemap_failed_qc, device = "png", path = NULL, scale = 1, width = 10, height = 10, units = "cm", dpi = 300, limitsize = TRUE)
Make a bar graph that describes the number of failed cells by distance from the edge of the plate
## get lists of cells for each square around the plate
total_0 <- table_platemap[which(table_platemap$Row %in% c(1,8) | table_platemap$Column %in% c(1,12)), ]
total_1 <- table_platemap[which(table_platemap$Row %in% c(2,7) | table_platemap$Column %in% c(2,11)), ]
total_1 <- total_1[-which(total_1$well %in% total_0$well),]
total_2 <- table_platemap[which(table_platemap$Row %in% c(3,6) | table_platemap$Column %in% c(3,10)), ]
total_2 <- total_2[-which(total_2$well %in% total_0$well | total_2$well %in% total_1$well),]
total_3 <- table_platemap[which(table_platemap$Row %in% c(4,5) | table_platemap$Column %in% c(4,9)), ]
total_3 <- total_3[-which(total_3$well %in% total_0$well | total_3$well %in% total_1$well | total_3$well %in% total_2$well),]
## create a dataframe which calculates the percentage failed for each distance from the edge
frequency_of_failed_cells <- data.frame(percentage_failed =
c(
sum(total_0$filtered_out) / (sum(total_0$passed_QC) + sum(total_0$filtered_out)),
sum(total_1$filtered_out) / (sum(total_1$passed_QC) + sum(total_1$filtered_out)),
sum(total_2$filtered_out) / (sum(total_2$passed_QC) + sum(total_2$filtered_out)),
sum(total_3$filtered_out) / (sum(total_3$passed_QC) + sum(total_3$filtered_out))
))
## neaten the dataframe
frequency_of_failed_cells$distance_from_edge <- c(0,1,2,3)
position_qc_fail <- ggplot(frequency_of_failed_cells, aes(x=distance_from_edge, y=percentage_failed)) +
geom_bar(stat="identity", fill="black")+
theme_classic() +
theme(panel.grid.major.x = element_blank(), panel.grid.minor.x = element_blank(), text = element_text(size=12)) +
## change the axis titles
labs(y= "Cells that failed QC (%)", x = "Number of wells from the edge of the plate") +
## prevent the title going off the edge of the plot
coord_cartesian(clip = 'off')
## save
ggsave("../images_to_export/QC_SS2_failed_position.png", plot = position_qc_fail, device = "png", path = NULL, scale = 1, width = 10, height = 10, units = "cm", dpi = 300, limitsize = TRUE)
## plot
position_qc_fail
prepare
## make dataframe
df_mapping_rates_hisat <- data.frame(alignment_rate = pheno$overall_hisat2_alignment_rate, cell_name = rownames(pheno), row.names = rownames(pheno))
## get cells that are filtered out
cells_filtered <- which(rownames(df_mapping_rates_hisat) %in% rownames(ss2_mutants_final@meta.data))
## make extra column in plotting df
df_mapping_rates_hisat$colour <- "Failed QC"
df_mapping_rates_hisat$colour[cells_filtered] <- "Passed QC"
## plot
mapping_rate_plot <- ggplot(df_mapping_rates_hisat, aes(x = reorder(cell_name,alignment_rate,sum), y=alignment_rate, fill = colour)) +
geom_col() +
theme(legend.position = "none") +
labs(x="Cells", y = "Overall Alignment Rate (%)", size = 15, fill = "QC Result") +
theme_classic() +
theme(plot.title = element_blank(), axis.text.x = element_blank(),
axis.ticks.x = element_blank(), text = element_text(size=20)) +
## change the colours
scale_fill_manual(values=c("red2", "green4"))
## save
ggsave("../images_to_export/QC_SS2_mapping_rate.eps", plot = mapping_rate_plot, device = "eps", path = NULL, scale = 1, width = 20, height = 10, units = "cm", dpi = 300, limitsize = TRUE)
## plot
mapping_rate_plot
How many cells are recovered per condition?
## how many plates?
plate_investigation <- pheno[which(pheno$sub_identity_updated == "GCSKO-20" | pheno$sub_identity_updated == "GCSKO-2" | pheno$sub_identity_updated == "WT-2" | pheno$sub_identity_updated == "WT-20"), ]
table(plate_investigation$plate_id_unique)
GCSKO-10_plate_1 GCSKO-10_plate_2 GCSKO-10_plate_3 GCSKO-10_plate_4
93 93 93 93
GCSKO-2_plate_1 GCSKO-2_plate_2
92 92
##plot
QC_by_genotype <- ggplot(df_cells_recovered, aes(x=genotype, y=recovered_pc ,fill=genotype)) +
geom_bar(stat = "identity") +
ylim(0, 100) +
#scale_fill_brewer(palette = "Set1") +
geom_text(aes(label=recovered_pc_rounded, vjust=0.5, hjust = 1.5)) +
geom_text(aes(label=number_cells_annotation, vjust=0.5, hjust = -0.05)) +
labs(y = "Percentage of cells recovered", x = "Genotype") +
coord_flip(clip = "off") +
theme_classic() +
## remove legend + add border so it doesn't clip the annotation
theme(legend.position="none", plot.margin = unit(c(1,3,1,3), "lines"))
## plot
QC_by_genotype
## save
ggsave("../images_to_export/QC_SS2_by_genotype.png", plot = QC_by_genotype, device = "png", path = NULL, scale = 1, width = 15, height = 10, units = "cm", dpi = 300, limitsize = TRUE)
Investigate 20 and 2 further
## how many plates?
plate_investigation <- pheno[which(pheno$sub_identity_updated == "GCSKO-20" | pheno$sub_identity_updated == "GCSKO-2" | pheno$sub_identity_updated == "WT-2" | pheno$sub_identity_updated == "WT-20"), ]
table(plate_investigation$plate_id_unique)
add columns if it failed QC:
## get cells that are filtered out
cells_kept <- which(rownames(plate_investigation) %in% rownames(ss2_mutants_final@meta.data))
## make extra column in plotting df
plate_investigation$colour <- "filtered_out"
plate_investigation$colour[cells_kept] <- "passed_QC"
table(plate_investigation$colour)
## add cols for plotting
plate_investigation$Row <- as.numeric(match(toupper(substr(plate_investigation$well_name_R, 1, 1)), LETTERS))
plate_investigation$Column <- as.numeric(substr(plate_investigation$well_name_R, 2, 5))
plot
## load this library to use grid.arrange function
library(gridExtra)
## use this function to extract legend:
## source: https://stackoverflow.com/questions/13649473/add-a-common-legend-for-combined-ggplots
## source: https://github.com/hadley/ggplot2/wiki/Share-a-legend-between-two-ggplot2-graphs
g_legend<-function(a.gplot){
tmp <- ggplot_gtable(ggplot_build(a.gplot))
leg <- which(sapply(tmp$grobs, function(x) x$name) == "guide-box")
legend <- tmp$grobs[[leg]]
return(legend)}
## get legend
mylegend<-g_legend(plot_list[[1]])
## make a final plot
QC_failed_plates <- grid.arrange(arrangeGrob(plot_list[[1]] + theme(legend.position="none") + labs(title = paste("Mutant 20", "\n", "plate 1")) + theme(plot.title = element_text(hjust = 0.5)),
plot_list[[2]] + theme(legend.position="none") + labs(title = paste("Mutant 20", "\n", "plate 2")) + theme(plot.title = element_text(hjust = 0.5)),
plot_list[[3]] + theme(legend.position="none") + labs(title = paste("Mutant 20", "\n", "plate 3")) + theme(plot.title = element_text(hjust = 0.5)),
plot_list[[4]] + theme(legend.position="none") + labs(title = paste("Mutant 20", "\n", "plate 4")) + theme(plot.title = element_text(hjust = 0.5)),
plot_list[[5]] + theme(legend.position="none") + labs(title = paste("Mutant 2", "\n", "plate 1")) + theme(plot.title = element_text(hjust = 0.5)),
plot_list[[6]] + theme(legend.position="none") + labs(title = paste("Mutant 2", "\n", "plate 2")) + theme(plot.title = element_text(hjust = 0.5)), nrow=2),
mylegend, nrow=2,heights=c(10,2))
## plot
QC_failed_plates
TableGrob (2 x 1) "arrange": 2 grobs
## save
ggsave("/Users/Andy/GCSKO/GCSKO_analysis_git/images_to_export/QC_SS2_failed_plates.png", plot = QC_failed_plates, device = "png", path = NULL, scale = 1, width = 27, height = 25, units = "cm", dpi = 300, limitsize = TRUE)
## load this library to use grid.arrange function
library(gridExtra)
## use this function to extract legend:
## source: https://stackoverflow.com/questions/13649473/add-a-common-legend-for-combined-ggplots
## source: https://github.com/hadley/ggplot2/wiki/Share-a-legend-between-two-ggplot2-graphs
g_legend<-function(a.gplot){
tmp <- ggplot_gtable(ggplot_build(a.gplot))
leg <- which(sapply(tmp$grobs, function(x) x$name) == "guide-box")
legend <- tmp$grobs[[leg]]
return(legend)}
## get legend
mylegend<-g_legend(plot_list[[1]])
## make a final plot
QC_failed_plates <- grid.arrange(arrangeGrob(plot_list[[1]] + theme(legend.position="none") + labs(title = paste("Mutant 20", "\n", "plate 1")) + theme(plot.title = element_text(hjust = 0.5)),
plot_list[[2]] + theme(legend.position="none") + labs(title = paste("Mutant 20", "\n", "plate 2")) + theme(plot.title = element_text(hjust = 0.5)),
plot_list[[3]] + theme(legend.position="none") + labs(title = paste("Mutant 20", "\n", "plate 3")) + theme(plot.title = element_text(hjust = 0.5)),
plot_list[[4]] + theme(legend.position="none") + labs(title = paste("Mutant 20", "\n", "plate 4")) + theme(plot.title = element_text(hjust = 0.5)),
plot_list[[5]] + theme(legend.position="none") + labs(title = paste("Mutant 2", "\n", "plate 1")) + theme(plot.title = element_text(hjust = 0.5)),
plot_list[[6]] + theme(legend.position="none") + labs(title = paste("Mutant 2", "\n", "plate 2")) + theme(plot.title = element_text(hjust = 0.5)), nrow=2),
mylegend, nrow=2,heights=c(10,2))
## plot
QC_failed_plates
## save
ggsave("../images_to_export/QC_failed_plates.png", plot = QC_SS2_failed_plates, device = "png", path = NULL, scale = 1, width = 27, height = 25, units = "cm", dpi = 300, limitsize = TRUE)
Add in bulk data predictions
#Pb Prediction correlations with bulk data (asexual hoo):
#Load in required package:
library(Hmisc)
#Cooerce expression data into a matrix and load in the reference timecourse data:
x10 <- as.matrix(ss2_mutants_final@assays$RNA@data)
rownames(x10) <- gsub("-", "_", rownames(x10))
#read in bulk data:
hoo<-as.matrix(read.table("../data/Reference/hoo_berg2.txt",header=T, row.names=1))
#Make a blank dataframe in which to add prediction:
df <- data.frame(matrix(ncol = 4, nrow = 0))
colnames(df) <- c("Prediction(Spearman)","r(Spearman)","Prediction(Pearsons)","r(Pearsons)")
#Do correlations with bulk data using both Spearman and Pearson (and the top 1000 genes):
for (i in 1:ncol(x10))
{
shared<-intersect(row.names(as.matrix(head(sort(x10[,i], decreasing=TRUE),1000))),row.names(hoo))
step0<-rcorr(x10[shared,i],hoo[shared,1:12],type = "spearman")
step1<-as.matrix(t(step0$r[2:13,1]))
step2<-rcorr(x10[shared,i],hoo[shared,1:12],type = "pearson")
step3<-as.matrix(t(step2$r[2:13,1]))
step4<-cbind(colnames(step1)[which.max(step1)],step1[which.max(step1)],colnames(step3)[which.max(step3)],step3[which.max(step3)])
colnames(step4) <- c("Prediction(Spearman)","r(Spearman)","Prediction(Pearsons)","r(Pearsons)")
rownames(step4)<-colnames(x10)[i]
df<-rbind(df,step4)
}
#Write out data into a csv file:
#write.csv(dfringr,file="/Users/ar19/Desktop/PhD/AR04_GCSKO_project/All_mutants_Feb_2018/predictionpbcombined.csv")
#Change the format of the output to make it more readable:
#gsub("Pb_","", dfringr[,1]) - Make predictions into 18hr.dat format:
#spearman:
df[,1] <- gsub("Pb_","", df[,1])
#Remove hr.dat from list:
df[,1] <- gsub("hr.dat","", df[,1])
#Check - dfringr[,1]
#Make into a number:
df[,1] <- as.numeric(df[,1])
df[,2] <- as.numeric(as.character(df[,2]))
#pearson:
df[,3] <- gsub("Pb_","", df[,3])
#Remove hr.dat from list:
df[,3] <- gsub("hr.dat","", df[,3])
#Check - dfringr[,1]
#Make into a number:
df[,3] <- as.numeric(df[,3])
df[,4] <- as.numeric(as.character(df[,4]))
#add to 10X object:
ss2_mutants_final <- AddMetaData(ss2_mutants_final, metadata = df)
Can also do with Kasia’s timecourse data:
kas<-as.matrix(read.table("../data/Reference/AP2OETC.txt",header=T, row.names=1))
#Make a blank dataframe in which to add prediction:
dfs <- data.frame(matrix(ncol = 4, nrow = 0))
colnames(dfs) <- c("ID","Prediction","r (Pearson)")
#Do correlations with bulk data using both Spearman and Pearson (and the top 1000 genes):
for (i in 1:ncol(x10))
{
shared<-intersect(row.names(as.matrix(head(sort(x10[,i], decreasing=TRUE),1000))),rownames(kas))
step0<-rcorr(x10[shared,i],kas[shared,1:10],type = "spearman")
step1<-as.matrix(t(step0$r[2:11,1]))
step2<-rcorr(x10[shared,i],kas[shared,1:10],type = "pearson")
step3<-as.matrix(t(step2$r[2:11,1]))
step4<-cbind(colnames(step1)[which.max(step1)],step1[which.max(step1)],colnames(step3)[which.max(step3)],step3[which.max(step3)])
colnames(step4) <- c("Prediction(Spearman)","r(Spearman)","Prediction(Pearsons)","r(Pearsons)")
rownames(step4)<-colnames(x10)[i]
dfs<-rbind(dfs,step4)
}
#Write out data into a csv file:
#write.csv(df,file="/Users/ar19/Desktop/PhD/AR04_GCSKO_project/All_mutants_Feb_2018/predictionkasiacombined.csv")
#Change the format of the output to make it more readable:
#gsub("Pb_","", dfs[,1]) - Make predictions into 18hr.dat format:
dfs[,1] <- gsub("X","", dfs[,1])
#Make into a number:
dfs[,1] <- as.numeric(dfs[,1])
#Make into a number:
dfs[,2] <- as.numeric(as.character(dfs[,2]))
#gsub("Pb_","", dfs[,1]) - Make predictions into 18hr.dat format:
dfs[,3] <- gsub("X","", dfs[,3])
#Make into a number:
dfs[,3] <- as.numeric(dfs[,3])
#dfs[,1]
#Make into a number:
dfs[,4] <- as.numeric(as.character(dfs[,4]))
colnames(dfs) <- c('Prediction(Spearman)_Kasia', 'r(Spearman)_Kasia', 'Prediction(Pearson)_Kasia', 'r(Pearson)_Kasia')
#add to Seurat:
#add to 10X object:
ss2_mutants_final <- AddMetaData(ss2_mutants_final, dfs)
normalise and find variable genes
#old way of running UMAP:
#ss2_mutants_final <-RunUMAP(ss2_mutants_final, reduction = "pca", dims = 1:10, n.neighbors = 150, seed.use = 1234, min.dist = 0.4, repulsion.strength = 0.03, local.connectivity = 150)
ss2_mutants_final <- RunUMAP(ss2_mutants_final, dims = 1:12)
20:09:17 UMAP embedding parameters a = 0.9922 b = 1.112
Found more than one class "dist" in cache; using the first, from namespace 'spam'
Also defined by ‘BiocGenerics’
20:09:17 Read 2970 rows and found 12 numeric columns
20:09:17 Using Annoy for neighbor search, n_neighbors = 30
Found more than one class "dist" in cache; using the first, from namespace 'spam'
Also defined by ‘BiocGenerics’
20:09:17 Building Annoy index with metric = cosine, n_trees = 50
0% 10 20 30 40 50 60 70 80 90 100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
20:09:18 Writing NN index file to temp file /var/folders/wj/rztzclxn1t10cl2sk0plbf3r0000gn/T//RtmpXGhNV1/file43372c318018
20:09:18 Searching Annoy index using 1 thread, search_k = 3000
20:09:19 Annoy recall = 100%
20:09:21 Commencing smooth kNN distance calibration using 1 thread
20:09:23 Initializing from normalized Laplacian + noise
20:09:23 Commencing optimization for 500 epochs, with 117082 positive edges
0% 10 20 30 40 50 60 70 80 90 100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
20:09:29 Optimization finished
ss2_mutants_final <- RunPCA(ss2_mutants_final, features = VariableFeatures(object = ss2_mutants_final), verbose = FALSE)
PCA plot
DimPlot(ss2_mutants_final, reduction = "pca", group.by = "sub_identity_updated")
ElbowPlot(ss2_mutants_final, ndims = 30, reduction = "pca")
ss2_mutants_final <- FindNeighbors(ss2_mutants_final, dims = 1:21)
ss2_mutants_final <- FindClusters(ss2_mutants_final, resolution = 1)
Improve UMAP
#old way of running UMAP:
#ss2_mutants_final <-RunUMAP(ss2_mutants_final, reduction = "pca", dims = 1:10, n.neighbors = 150, seed.use = 1234, min.dist = 0.4, repulsion.strength = 0.03, local.connectivity = 150)
ss2_mutants_final <- RunUMAP(ss2_mutants_final, dims = 1:12)
p1 <- DimPlot(ss2_mutants_final, group.by = "ident", label = TRUE)
p2 <- DimPlot(ss2_mutants_final, group.by = "identity_updated")
p1 + p2
plot <- FeaturePlot(ss2_mutants_final, features = "nFeature_RNA")
HoverLocator(plot = plot, information = FetchData(ss2_mutants_final, vars = c("nFeature_RNA", "ident", "identity_updated")))
# PBANKA-0515000 - p25 - female
# PBANKA-1319500 - CCP2 - female - used in 820 line
# PBANKA-1212600 - HAP2 - male
# PBANKA-0600600 - NEK3 - male
# PBANKA-1315700 - RON2 - (asexuals and some male?)
# "PBANKA-0416100" - dynenin heavy chain - male - used in 820 line
# PBANKA-1437500 - AP2-G - seuxal commitment gene
# PBANKA-0831000 - MSP1 - late asexual
# PBANKA-1102200 - MSP8 - early asexual (from Bozdech paper)
FeaturePlot(ss2_mutants_final, features = c("PBANKA-0515000", "PBANKA-1319500", "PBANKA-1212600","PBANKA-0600600", "PBANKA-1315700", "PBANKA-0416100", "PBANKA-1437500", "PBANKA-0831000", "PBANKA-1102200"), coord.fixed = TRUE)
Violin plots cluster-by-cluster:
df_meta_data <- as.data.frame(ss2_mutants_final@meta.data)
dot_plot_df <- as.data.frame.matrix(table(df_meta_data$RNA_snn_res.1, df_meta_data$identity_updated))
dot_plot_df_pc <- (as.data.frame.matrix(prop.table(table(df_meta_data$RNA_snn_res.1, df_meta_data$identity_updated), margin = 2)) * 100)
dot_plot_df_pc$cluster <- rownames(dot_plot_df_pc)
library(dplyr)
dot_plot_df_pc_mutated <- mutate(dot_plot_df_pc)
library(reshape2)
dot_plot_df_pc_melted <- melt(dot_plot_df_pc, variable.name = "cluster")
Using cluster as id variables
colnames(dot_plot_df_pc_melted)[2] <- "identity"
library(ggplot2)
p = ggplot(dot_plot_df_pc_melted,
aes(y = factor(cluster),
x = factor(identity))) +
geom_point(aes(colour=value, size=value)) +
scale_color_gradient(low="blue", high="red", limits=c( 1, max(dot_plot_df_pc_melted$value)), na.value="white") +
theme_bw() +
theme(panel.grid.major=element_blank(), panel.grid.minor=element_blank())
p = p +
ylab("Cluster") +
xlab("Identity") +
theme(axis.text.x=element_text(size=12, face="italic", angle=45, hjust=1)) +
theme(axis.text.y=element_text(size=12, face="italic"))
print(p)
This essentially allows us to view which genotypes sit in which clusters. Where we see that WT is not present, we can presume that this mutant has a unique transcriptome (either from the whole WT trajectory or it has arrested and so represents a WT developing form - we can find this out in the next stage of analaysis when we integrate with 10X data).
df_meta_data <- as.data.frame(ss2_mutants_final@meta.data)
dot_plot_df <- as.data.frame.matrix(table(df_meta_data$RNA_snn_res.1, df_meta_data$identity_updated))
dot_plot_df_pc <- (as.data.frame.matrix(prop.table(table(df_meta_data$RNA_snn_res.1, df_meta_data$identity_updated), margin = 2)) * 100)
dot_plot_df_pc$cluster <- rownames(dot_plot_df_pc)
library(dplyr)
dot_plot_df_pc_mutated <- mutate(dot_plot_df_pc)
library(reshape2)
dot_plot_df_pc_melted <- melt(dot_plot_df_pc, variable.name = "cluster")
colnames(dot_plot_df_pc_melted)[2] <- "identity"
## make plots
plots <- FeaturePlot(ss2_mutants_final, features = c("PBANKA-1319500", "PBANKA-0416100"), blend = TRUE, combine = FALSE, coord.fixed = TRUE)
# Get just the co-expression plot, built-in legend is meaningless for this plot
#plots[[3]] + NoLegend()
# Get just the key
#plots[[4]]
# Stitch the co-expression and key plots together
plots[[3]] + NoLegend() + plots[[4]]/plot_spacer() + plot_layout(widths = c(2,1))
Mutants 3, 2, and 13 all seem to display this unique phenotype.
FeaturePlot(ss2_mutants_final, features = c("Prediction.Spearman._Kasia", "Prediction.Spearman."))
Confirm life cycle designations using bulk correlations
FeaturePlot(ss2_mutants_final, features = c("Prediction.Spearman._Kasia", "Prediction.Spearman."))
## read in data
counts_mca <- read.table("../data/Reference/allcounts4.csv", header = TRUE, sep = ",", row.names=1, stringsAsFactors = TRUE)
dim(counts_mca)
anno_mca <- read.delim("../data/Reference/allpheno8.2.csv", header = TRUE, sep = ",", row.names=1)
dim(anno_mca)
## subset all blood stage cells
## find blood stages
blood_stages <- c("Merozoite", "Shz", "Male", "Schizont", "Female", "Trophozoite", "Ring")
blood_stage_cell_names <- rownames(anno_mca[anno_mca$ShortenedLifeStage2 %in% blood_stages, ])
## subset dataframes
counts_mca_blood_stage <- counts_mca[ ,colnames(counts_mca) %in% blood_stage_cell_names]
dim(counts_mca_blood_stage)
anno_mca_blood_stage <- anno_mca[rownames(anno_mca) %in% blood_stage_cell_names, ]
dim(anno_mca_blood_stage)
## remove control cells:
non_control_cell_names <- rownames(anno_mca_blood_stage[anno_mca_blood_stage$Number_of_cells == 1, ])
counts_mca_blood_stage <- counts_mca_blood_stage[ ,colnames(counts_mca_blood_stage) %in% non_control_cell_names]
dim(counts_mca_blood_stage)
anno_mca_blood_stage <- anno_mca_blood_stage[rownames(anno_mca_blood_stage) %in% non_control_cell_names, ]
dim(anno_mca_blood_stage)
## check
identical(rownames(counts_mca_blood_stage), rownames(counts))
mca object
## make Seurat object with same filtering as main object because they are sequenced to same depth:
GCSKO_mca <- CreateSeuratObject(counts = counts_mca_blood_stage, meta.data = anno_mca_blood_stage, min.cells = 0, min.features = 120, project = "GCSKO")
Feature names cannot have underscores ('_'), replacing with dashes ('-')
GCSKO_mca
An object of class Seurat
5245 features across 999 samples within 1 assay
Active assay: RNA (5245 features, 0 variable features)
GCSKO_mca@meta.data$experiment <- "mca"
ss2_mutants_final@meta.data$experiment <- "ss2_mutants"
n.b. a filter of 40 allows the merozoites to complete the ring in the asexuals
## make list
ss2.mca.list <- list(GCSKO_mca, ss2_mutants_final)
ss2.mca.anchors <- FindIntegrationAnchors(object.list = ss2.mca.list, dims = 1:21, verbose = FALSE)
ss2.mca.integrated <- IntegrateData(anchorset = ss2.mca.anchors, dims = 1:21, verbose = FALSE, features.to.integrate = all.genes)
Adding a command log without an assay associated with it
# set during IntegrateData
DefaultAssay(ss2.mca.integrated) <- "integrated"
# Run the standard workflow for visualization and clustering
ss2.mca.integrated <- ScaleData(ss2.mca.integrated, verbose = FALSE)
ss2.mca.integrated <- RunPCA(ss2.mca.integrated, npcs = 30, verbose = FALSE)
## run UMAP
ss2.mca.integrated <- RunUMAP(ss2.mca.integrated, reduction = "pca", dims = 1:15)
20:11:28 UMAP embedding parameters a = 0.9922 b = 1.112
Found more than one class "dist" in cache; using the first, from namespace 'spam'
Also defined by ‘BiocGenerics’
20:11:28 Read 3969 rows and found 15 numeric columns
20:11:28 Using Annoy for neighbor search, n_neighbors = 30
Found more than one class "dist" in cache; using the first, from namespace 'spam'
Also defined by ‘BiocGenerics’
20:11:28 Building Annoy index with metric = cosine, n_trees = 50
0% 10 20 30 40 50 60 70 80 90 100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
20:11:30 Writing NN index file to temp file /var/folders/wj/rztzclxn1t10cl2sk0plbf3r0000gn/T//RtmpXGhNV1/file4337275d142d
20:11:30 Searching Annoy index using 1 thread, search_k = 3000
20:11:31 Annoy recall = 100%
20:11:33 Commencing smooth kNN distance calibration using 1 thread
20:11:35 Initializing from normalized Laplacian + noise
20:11:36 Commencing optimization for 500 epochs, with 159194 positive edges
0% 10 20 30 40 50 60 70 80 90 100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
20:11:43 Optimization finished
Scale and PCA
VlnPlot(object = ss2.mca.integrated, features = "nFeature_RNA", pt.size = 0.01) + geom_hline(yintercept=100)
Inspect PCS:
## copy old clusters
#ss2.mca.integrated <- AddMetaData(ss2.mca.integrated, ss2.mca.integrated@meta.data$RNA_snn_res.1, col.name = "pre_integration_clusters")
## generate new clusters
#ss2.mca.integrated <- FindNeighbors(ss2.mca.integrated, dims = 1:21)
#ss2.mca.integrated <- FindClusters(ss2.mca.integrated, resolution = 1, random.seed = 42, algorithm = 2)
UMAP
## run UMAP
ss2.mca.integrated <- RunUMAP(ss2.mca.integrated, reduction = "pca", dims = 1:15)
pheno_filtered_cells <- ss2_mutants_final@meta.data
counts_filtered_cells <- ss2_mutants_final@assays$RNA@counts
write.csv(counts_filtered_cells, file = "/Users/Andy/GCSKO/GCSKO_analysis_git/data_to_export/counts_post_qc.csv")
write.csv(pheno_filtered_cells, file = "/Users/Andy/GCSKO/GCSKO_analysis_git/data_to_export/pheno_post_qc.csv")
VlnPlot(object = ss2.mca.integrated, features = "nFeature_RNA", pt.size = 0.01) + geom_hline(yintercept=100)
generate clusters:
## copy old clusters
#ss2.mca.integrated <- AddMetaData(ss2.mca.integrated, ss2.mca.integrated@meta.data$RNA_snn_res.1, col.name = "pre_integration_clusters")
## generate new clusters
#ss2.mca.integrated <- FindNeighbors(ss2.mca.integrated, dims = 1:21)
#ss2.mca.integrated <- FindClusters(ss2.mca.integrated, resolution = 1, random.seed = 42, algorithm = 2)
save counts and pheno without anopheles and control cells
sessionInfo()
R version 4.0.2 (2020-06-22)
Platform: x86_64-apple-darwin17.0 (64-bit)
Running under: macOS Catalina 10.15.6
Matrix products: default
BLAS: /System/Library/Frameworks/Accelerate.framework/Versions/A/Frameworks/vecLib.framework/Versions/A/libBLAS.dylib
LAPACK: /Library/Frameworks/R.framework/Versions/4.0/Resources/lib/libRlapack.dylib
locale:
[1] en_GB.UTF-8/en_GB.UTF-8/en_GB.UTF-8/C/en_GB.UTF-8/en_GB.UTF-8
attached base packages:
[1] stats4 parallel grid stats graphics grDevices utils
[8] datasets methods base
other attached packages:
[1] SingleCellExperiment_1.10.1 SummarizedExperiment_1.18.2
[3] DelayedArray_0.14.1 matrixStats_0.56.0
[5] GenomicRanges_1.40.0 GenomeInfoDb_1.24.2
[7] IRanges_2.22.2 S4Vectors_0.26.1
[9] Biobase_2.48.0 BiocGenerics_0.34.0
[11] KernSmooth_2.23-17 fields_10.3
[13] maps_3.3.0 spam_2.5-1
[15] dotCall64_1.0-0 mixtools_1.2.0
[17] scales_1.1.1 knitr_1.29
[19] reshape2_1.4.4 Hmisc_4.4-0
[21] Formula_1.2-3 survival_3.2-3
[23] lattice_0.20-41 gridExtra_2.3
[25] dplyr_1.0.0 patchwork_1.0.1
[27] ggplot2bdc_0.3.2 cowplot_1.0.0
[29] ggpubr_0.4.0 ggplot2_3.3.2
[31] viridis_0.5.1 viridisLite_0.3.0
[33] Seurat_3.2.0
loaded via a namespace (and not attached):
[1] reticulate_1.16 tidyselect_1.1.0 htmlwidgets_1.5.1
[4] Rtsne_0.15 devtools_2.3.0 munsell_0.5.0
[7] codetools_0.2-16 ica_1.0-2 future_1.18.0
[10] miniUI_0.1.1.1 withr_2.2.0 colorspace_1.4-1
[13] highr_0.8 rstudioapi_0.11 ROCR_1.0-11
[16] ggsignif_0.6.0 tensor_1.5 listenv_0.8.0
[19] labeling_0.3 GenomeInfoDbData_1.2.3 polyclip_1.10-0
[22] farver_2.0.3 pheatmap_1.0.12 rprojroot_1.3-2
[25] vctrs_0.3.2 generics_0.0.2 xfun_0.15
[28] R6_2.4.1 rsvd_1.0.3 bitops_1.0-6
[31] spatstat.utils_1.17-0 assertthat_0.2.1 promises_1.1.1
[34] nnet_7.3-14 gtable_0.3.0 globals_0.12.5
[37] processx_3.4.3 goftest_1.2-2 rlang_0.4.7
[40] splines_4.0.2 rstatix_0.6.0 lazyeval_0.2.2
[43] acepack_1.4.1 hexbin_1.28.1 broom_0.7.0
[46] checkmate_2.0.0 yaml_2.2.1 abind_1.4-5
[49] crosstalk_1.1.0.1 backports_1.1.8 httpuv_1.5.4
[52] tools_4.0.2 usethis_1.6.1 ellipsis_0.3.1
[55] RColorBrewer_1.1-2 sessioninfo_1.1.1 ggridges_0.5.2
[58] Rcpp_1.0.5 plyr_1.8.6 zlibbioc_1.34.0
[61] base64enc_0.1-3 RCurl_1.98-1.2 purrr_0.3.4
[64] ps_1.3.3 prettyunits_1.1.1 rpart_4.1-15
[67] deldir_0.1-28 pbapply_1.4-2 zoo_1.8-8
[70] haven_2.3.1 ggrepel_0.8.2 cluster_2.1.0
[73] fs_1.4.2 tinytex_0.25 magrittr_1.5
[76] data.table_1.12.8 RSpectra_0.16-0 openxlsx_4.1.5
[79] lmtest_0.9-37 RANN_2.6.1 fitdistrplus_1.1-1
[82] pkgload_1.1.0 hms_0.5.3 mime_0.9
[85] evaluate_0.14 xtable_1.8-4 rio_0.5.16
[88] jpeg_0.1-8.1 readxl_1.3.1 testthat_2.3.2
[91] compiler_4.0.2 tibble_3.0.3 crayon_1.3.4
[94] htmltools_0.5.0 segmented_1.2-0 mgcv_1.8-31
[97] later_1.1.0.1 tidyr_1.1.0 MASS_7.3-51.6
[100] Matrix_1.2-18 car_3.0-8 cli_2.0.2
[103] igraph_1.2.5 forcats_0.5.0 pkgconfig_2.0.3
[106] foreign_0.8-80 plotly_4.9.2.1 XVector_0.28.0
[109] stringr_1.4.0 callr_3.4.3 digest_0.6.25
[112] sctransform_0.2.1 RcppAnnoy_0.0.16 spatstat.data_1.4-3
[115] rmarkdown_2.3 cellranger_1.1.0 leiden_0.3.3
[118] htmlTable_2.0.1 uwot_0.1.8 curl_4.3
[121] kernlab_0.9-29 shiny_1.5.0 lifecycle_0.2.0
[124] nlme_3.1-148 jsonlite_1.7.0 carData_3.0-4
[127] desc_1.2.0 fansi_0.4.1 pillar_1.4.6
[130] fastmap_1.0.1 httr_1.4.2 pkgbuild_1.1.0
[133] glue_1.4.1 remotes_2.1.1 zip_2.1.0
[136] spatstat_1.64-1 png_0.1-7 stringi_1.4.6
[139] latticeExtra_0.6-29 memoise_1.1.0 irlba_2.3.3
[142] future.apply_1.6.0 ape_5.4
save counts and pheno objects
pheno_filtered_cells <- ss2_mutants_final@meta.data
counts_filtered_cells <- ss2_mutants_final@assays$RNA@counts
write.csv(counts_filtered_cells, file = "../data_to_export/counts_post_qc.csv")
write.csv(pheno_filtered_cells, file = "../data_to_export/pheno_post_qc.csv")
save the session objects
## save all data
#save.image(file = "../GCSKO_SS2_QC.RData")
#load(file = "../GCSKO_SS2_QC.RData")
## save final object
saveRDS(ss2_mutants_final, file = "../data_to_export/ss2_mutants_final.RDS") ## to read this object back in if needed
ss2_mutants_final <- readRDS("../data_to_export/ss2_mutants_final.RDS")